/*** xrkmonitor license ***

   Copyright (c) 2020 by rockdeng

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


   字符云监控(xrkmonitor) 开源版 (c) 2019 by rockdeng
   当前版本：v1.0
   使用授权协议： apache license 2.0

   开源版主页：http://open.xrkmonitor.com
   云版本主页：http://xrkmonitor.com
  

   云版本为开源版提供永久免费告警通道支持，告警通道支持短信、邮件、
   微信等多种方式，欢迎使用

   外置监控插件 monitor_apache_log 功能:
        通过 apacache 模块 log_config 监控 apache 网站流量、访问量等指标

****/


#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <unistd.h>
#include <math.h>
#include <netinet/in.h>
#include <sys/time.h>
#include <sys/types.h>
#include <signal.h>
#include <errno.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <map>

#include <mt_report.h>
#include <sv_cfg.h>
#include "monitor_apache_log.h"
#include "xrk_monitor_apache_log.h"

int g_aryPluginAttr[XRK_PLUGIN_ATTRS_COUNT_MAX];

static const std::string s_HttpReq("http://");
static const std::string s_HttpsReq("https://");
static const std::string g_strPluginName(XRK_PLUGIN_NAME);

CONFIG stConfig;
int SetNBlock(int iSock)
{    
    int iFlags = 0;
    iFlags = fcntl(iSock, F_GETFL, 0);
    iFlags |= O_NONBLOCK;
    iFlags |= O_NDELAY;
    fcntl(iSock, F_SETFL, iFlags);
    return 0;
}


// 日志项是否被程序支持
bool IsLogItemSupport(const char *pLogItem)
{
    static const char *sItemSupports[] = { "%a", "%q", "%U", "%D", "%{Referer}i", "%B", NULL };
    for(int i=0; sItemSupports[i] != NULL; i++)
    {
        if(!strcmp(pLogItem, sItemSupports[i]))
            return true;
    }
    return false;
}

//
// 解析并判断日志格式是否合法，是否已被程序支持，注意这里的 fmt 必须要跟 apache 中配置的一致
// 示例配置：
//
// LogFormat "%a %D %q %B" xrk_log_fmt
// CustomLog "|/srv/www/xrkplugin/monitor_apache_log" xrk_log_fmt
//
// 则 monitor_apache_log 的配置：
//  APACHE_LOG_FMT 也必须为 "%a %D %q %B", 即参数中的 fmt 为："%a %D %q %B"
// 
int ParseApacheLogFmt(const char *fmt)
{
    char *pfmt = strdup(fmt);
    char *pread = pfmt;
    char *pstart = NULL;

    while(pread && *pread != '\0')
    {
        if(*pread == '\"' || *pread == ' ' || *pread == '\t' || *pread == '\n' || *pread == '\r') 
        {
            if(pstart == NULL) {
                pread++;
                continue;
            }

            *pread = '\0';
            if(IsLogItemSupport(pstart)) {
                stConfig.listLogItem.push_back(pstart);
            }
            else {
                printf("not support log item:%s\n", pstart);
                free(pfmt);
                return SLOG_ERROR_LINE;
            }
            pread++;
            pstart = NULL;
            continue;
        }
        else if(pstart == NULL) {
            pstart = pread;
        }
        pread++;
    }

    // 最后一个 log item
    if(pstart != NULL) {
        if(IsLogItemSupport(pstart)) {
            stConfig.listLogItem.push_back(pstart);
        }
        else {
            printf("not support log item:%s\n", pstart);
            free(pfmt);
            return SLOG_ERROR_LINE;
        }
    }

    free(pfmt);
    if(stConfig.listLogItem.empty()) {
        printf("have no valid log item !\n");
        return SLOG_ERROR_LINE;
    }
    return 0;
}

int InitAttrConfig(std::string &conf)
{
    char *pattrs = strdup(XRK_PLUGIN_ALL_ATTRS);
    if(pattrs == NULL)
        return ERROR_LINE;
    char *pmem = pattrs;
    char *pattr = NULL, *psave = NULL;

    // 配置文件版本 check
    char szConfVer[12] = {0};
    LoadConfig(conf.c_str(),
        "XRK_PLUGIN_CONFIG_FILE_VER", CFG_STRING, szConfVer, "0", sizeof(szConfVer),
        (void*)NULL);
    if(strcmp(szConfVer, XRK_PLUGIN_HEADER_FILE_VER)) {
        MtReport_Log_Warn("check config file version failed(%s != %s)", 
            szConfVer, XRK_PLUGIN_HEADER_FILE_VER);
    }

    const char *pfirst = NULL, *plast = NULL;
    int i = 0, iAttrId = 0;
    for(; i < XRK_PLUGIN_ATTRS_COUNT_MAX; i++)
    {
        pattr = strtok_r(pattrs, " ", &psave);
        if(NULL == pattr) {
            i++;
            break;
        }

        if(LoadConfig(conf.c_str(), pattr, CFG_INT, &iAttrId, 0, (void*)NULL) < 0) { 
            MtReport_Log_Error("read attr:%s from file:%s failed", pattr, conf.c_str());
            break;
        }
        g_aryPluginAttr[i] = iAttrId;
        MtReport_Log_Debug("read attr config:%s, attr id:%d, ary index:%d", pattr, iAttrId, i);
        if(i==0)
            pfirst = pattr;
        else if(i+1 == XRK_PLUGIN_ATTRS_COUNT_MAX)
            plast = pattr;
        pattrs = NULL;
    }
    if(i < XRK_PLUGIN_ATTRS_COUNT_MAX) {
        MtReport_Log_Error("read attr failed (%d, %d), pattr:%p", i, XRK_PLUGIN_ATTRS_COUNT_MAX, pattr);
        free(pmem);
        return ERROR_LINE;
    }
    MtReport_Log_Info("read attr count:%d, first:(%s,%d), last:(%s,%d)",
        i, pfirst, g_aryPluginAttr[0], plast, g_aryPluginAttr[i-1]);
    free(pmem);
    return 0;
}


int Init(char *argv[])
{
    char *ptmp = NULL;
    char *pself = strdup(argv[0]);
    if((ptmp=strrchr(pself, '/')) != NULL)
        *ptmp = '\0';

    // 读取配置文件
    std::string strPlugConfFile(pself);
    strPlugConfFile += "/xrk_";
    strPlugConfFile += g_strPluginName;
    strPlugConfFile += ".conf";

    int iRet = 0;
    iRet = MtReport_Plus_Init(
        strPlugConfFile.c_str(), XRK_PLUGIN_ID, XRK_PLUGIN_NAME, XRK_PLUGIN_HEADER_FILE_VER);
    if(iRet < 0 || g_mtReport.pMtShm == NULL) {
        return ERROR_LINE;
    }      

    if((iRet=LoadConfig(strPlugConfFile.c_str(),
        // apache 日志格式描述
        "APACHE_LOG_FMT", CFG_LINE, stConfig.sApacheFmt, "%a %D %q %U %{Referer}i %B", sizeof(stConfig.sApacheFmt),
        // 独立IP 访问量，统计时间间隔
        "IP_REQ_CHECK_TIME", CFG_INT, &stConfig.iIpReqCheckTime, 60,
        NULL)) < 0)
    {
        fprintf(stderr, "loadconfig from:%s failed, msg:%s !", strPlugConfFile.c_str(), strerror(errno));
        free(pself);
        return ERROR_LINE;
    }

    if(ParseApacheLogFmt(stConfig.sApacheFmt) < 0)
    {
        fprintf(stderr, "check apache log format:%s failed !\n", stConfig.sApacheFmt);
        free(pself);
        return SLOG_ERROR_LINE;
    }
    free(pself);

    if((iRet=InitAttrConfig(strPlugConfFile)) < 0) 
    {
        fprintf(stderr, "read plugin attr info failed, ret:%d", iRet);
        return ERROR_LINE;
    }
    return 0;
}

inline bool TRY_NEXT_UNTIL_SPACE(char *pread)
{
    while(*pread != ' ' && *pread != '\0' && *pread != '\n') { 
        pread++; 
    } 

    if( *pread == '\0') 
        return true;

    *pread = '\0'; 
    return false;
}

void OnGetReferer(const char *pstart)
{
    static char s_Buf[1024] = {0};

    // 从 referer 中提取出域名进行统计
    strncpy(s_Buf, pstart, sizeof(s_Buf)-1);
    char *pDomain = (char*)s_Buf;
    if(!strncmp(pDomain, s_HttpReq.c_str(), s_HttpReq.size())) 
        pDomain += s_HttpReq.size();
    else if(!strncmp(pDomain, s_HttpsReq.c_str(), s_HttpsReq.size())) 
        pDomain += s_HttpsReq.size();
    char *pDomainStart = pDomain;
    while(*pDomain != '\0' && *pDomain != '/')
        pDomain++;
    if(*pDomain == '/')
        *pDomain = '\0';

    std::map<std::string, int>::iterator it_map_si = stConfig.mapReqRefers.find(pDomainStart);
    if(it_map_si == stConfig.mapReqRefers.end()) {
        stConfig.mapReqRefers[pDomainStart] = 1;
        MtReport_Log_Debug("get request referer:%s, domain:%s, count:1", pstart, pDomainStart);
    }
    else { 
        it_map_si->second++;
        MtReport_Log_Debug("get request referer:%s, domain:%s, count:%d", 
            pstart, pDomainStart, it_map_si->second);
    }
}

void ReportRespTimeInfo()
{
    if(stConfig.stRespTimeInfo[TIME_0_10] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "0 - 10 ms", stConfig.stRespTimeInfo[TIME_0_10]);
    if(stConfig.stRespTimeInfo[TIME_10_30] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "10 - 30 ms", stConfig.stRespTimeInfo[TIME_10_30]);
    if(stConfig.stRespTimeInfo[TIME_30_50] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "30 - 50 ms", stConfig.stRespTimeInfo[TIME_30_50]);
    if(stConfig.stRespTimeInfo[TIME_50_100] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "50 - 100 ms", stConfig.stRespTimeInfo[TIME_50_100]);
    if(stConfig.stRespTimeInfo[TIME_100_200] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "100 - 200 ms", stConfig.stRespTimeInfo[TIME_100_200]);
    if(stConfig.stRespTimeInfo[TIME_200_500] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "200 - 500 ms", stConfig.stRespTimeInfo[TIME_200_500]);
    if(stConfig.stRespTimeInfo[TIME_500_1000] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "500 - 1000 ms", stConfig.stRespTimeInfo[TIME_500_1000]);
    if(stConfig.stRespTimeInfo[TIME_1000_2000] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "1000 - 2000 ms", stConfig.stRespTimeInfo[TIME_1000_2000]);
    if(stConfig.stRespTimeInfo[TIME_be_2000] > 0)
        MtReport_Str_Attr_Add(XRK_RESP_TIME_DIS, "大于 2000 ms", stConfig.stRespTimeInfo[TIME_be_2000]);
    memset(stConfig.stRespTimeInfo, 0, sizeof(stConfig.stRespTimeInfo));
}

void AddRespTimeInfo(int iDealTimeWs)
{
    int iMs = (int)ceil((float)iDealTimeWs/1000);
    if(iMs >= 0 && iMs < 10)
        stConfig.stRespTimeInfo[TIME_0_10]++;
    else if(iMs >= 10 && iMs < 30)
        stConfig.stRespTimeInfo[TIME_10_30]++;
    else if(iMs >= 30 && iMs < 50)
        stConfig.stRespTimeInfo[TIME_30_50]++;
    else if(iMs >= 50 && iMs < 100)
        stConfig.stRespTimeInfo[TIME_50_100]++;
    else if(iMs >= 100 && iMs < 200)
        stConfig.stRespTimeInfo[TIME_100_200]++;
    else if(iMs >= 200 && iMs < 500)
        stConfig.stRespTimeInfo[TIME_200_500]++;
    else if(iMs >= 500 && iMs < 1000)
        stConfig.stRespTimeInfo[TIME_500_1000]++;
    else if(iMs >= 1000 && iMs < 2000)
        stConfig.stRespTimeInfo[TIME_1000_2000]++;
    else 
        stConfig.stRespTimeInfo[TIME_be_2000]++;
}

void ParseApacheLog(char *pLog)
{
    char *pread = pLog;
    char *pstart = pread;

    bool bEnd = false;
    bool bMaxResponseTimeChange = false;
    int iDealTimeWs = 0;
    std::map<std::string, int>::iterator it_map_si;
    std::list<std::string>::iterator it = stConfig.listLogItem.begin();
    std::string strReqIp;
    std::string strReqUri;
    std::string strReqUrl;
    std::string strTmp;

    for(; !bEnd &&  it != stConfig.listLogItem.end() && *pread != '\0'; it++)
    {
#define TRY_NEXT_UNTIL_SPACE do { \
    while(*pread != ' ' && *pread != '\0' && *pread != '\n') { \
        pread++; \
    } \
    if( *pread == '\0') \
        bEnd = true; \
    else \
        *pread = '\0'; \
}while(0)

        if(*it == "%a") { // 请求来源 IP
            TRY_NEXT_UNTIL_SPACE;
            if(pstart < pread) {
                it_map_si = stConfig.mapReqIps.find(pstart);
                if(it_map_si == stConfig.mapReqIps.end()) {
                    // 独立IP 请求量统计
                    MtReport_Attr_Add(XRK_ACC_IP_COUNT, 1);
                    stConfig.mapReqIps[pstart] = 1;
                    MtReport_Log_Debug("get new ip:%s request", pstart);
                }
                else {
                    it_map_si->second++;
                }
                strReqIp = pstart;
            }
        }
        else if(*it == "%q") { // 请求 uri
            TRY_NEXT_UNTIL_SPACE;
            if(pstart < pread) {
                strReqUri = pstart;
                MtReport_Log_Debug("get request uri:%s", pstart);
            }
        }
        else if(*it == "%U") { // 请求 url
            TRY_NEXT_UNTIL_SPACE;
            if(pstart < pread) {
                strReqUrl = pstart;
                MtReport_Log_Debug("get request url:%s", pstart);
            }
        }
        else if(*it == "%{Referer}i") { // 请求来源
            TRY_NEXT_UNTIL_SPACE;
            if(pstart < pread)
                OnGetReferer(pstart);
        }
        else if(*it == "%D") { // 请求响应时间，统计最大响应时间以及平均响应时间
            TRY_NEXT_UNTIL_SPACE;
            if(pstart < pread) {
                iDealTimeWs = atoi(pstart);
                stConfig.uiTotalResponseTime += iDealTimeWs;
                if(stConfig.iMaxResponseTime < iDealTimeWs)
                {
                    stConfig.iMaxResponseTime = iDealTimeWs;
                    bMaxResponseTimeChange = true;
                }
                MtReport_Log_Debug("get request response time:%d, max:%d, total:%u",
                    iDealTimeWs, stConfig.iMaxResponseTime, stConfig.uiTotalResponseTime);
            }
        }
        else if(*it == "%B") {
            TRY_NEXT_UNTIL_SPACE;
            if(pstart < pread) {
                MtReport_Attr_Add(XRK_SITE_OUT_DATA, atoi(pstart));
                MtReport_Log_Debug("send response:%d", atoi(pstart));
            }
        }
        else {
            MtReport_Log_Warn("find not support log item:%s", (*it).c_str());
            return;
        }

        pread++;
        pstart = pread;
#undef TRY_NEXT_UNTIL_SPACE
    }

    if(iDealTimeWs > 0)
        AddRespTimeInfo(iDealTimeWs);
    if(strReqUrl.size() > 0) {
        it_map_si = stConfig.mapReqRes.find(strReqUrl);
        if(it_map_si == stConfig.mapReqRes.end())
            stConfig.mapReqRes[strReqUrl] = 1;
        else
            it_map_si->second++;
    }

    if(bMaxResponseTimeChange) {
        stConfig.strMaxReqInfo = strReqUrl+strReqUri;
        stConfig.strMaxReqIp = strReqIp;
    }
}

const char * itoa(int i)
{
    static char s_sBuf[20];
    snprintf(s_sBuf, sizeof(s_sBuf), "%d", i);
    return s_sBuf;
}


// 上报每分钟的请求来源IP, 以及请求量
std::string & GetRequestIpStr()
{
    static std::string s_strIps;
    s_strIps.clear();
    std::map<std::string, int>::iterator it = stConfig.mapReqIps.begin();
    s_strIps = "ip count:";
    s_strIps += itoa(stConfig.mapReqIps.size());
    s_strIps += " | ";
    while(true)
    {
        s_strIps += it->first;
        s_strIps += "(";
        s_strIps += itoa(it->second);
        s_strIps += ")";

        // 根据来访 IP 做访客地域分布
        MtReport_Str_Attr_Add(XRK_ACC_REGION_DIS, it->first.c_str(), it->second);
        it++;
        if(it != stConfig.mapReqIps.end())
            s_strIps+=" | ";
        else
            break;
    }
    return s_strIps;
}

int main(int argc, char *argv[])
{
    if(Init(argv) < 0)
        return SLOG_ERROR_LINE;
    MtReport_Log_Debug("argc:%d, config:%d, apache log format:%s", argc, stConfig.iCfgId, stConfig.sApacheFmt);

    static char sApacheLog[1024*1024];
    size_t iLogBufLen = sizeof(sApacheLog)-1;
    int iRead = 0, iRet = 0;
    time_t tmCur = 0;
    stConfig.dwStartStaticTime = time(NULL);

    struct timeval tm;
    fd_set fdsetr, fdsets;
    FD_ZERO(&fdsets);
    FD_SET(0, &fdsets);

    // 标准输入管道设置为非阻塞模式
    SetNBlock(0);
    while(1)
    {
        tmCur = time(NULL);
        iRet = MtReport_Plus_Hello(tmCur);
        if(iRet) {
            MtReport_Log_Error("plugin check failed, ret:%d", iRet);
            system("./stop.sh > /dev/null 2>&1");
            break;
        }

        tm.tv_sec = 1;
        tm.tv_usec = 100000;
        memcpy(&fdsetr, &fdsets, sizeof(fdsets));
        iRet = select(1, &fdsetr, NULL, NULL, &tm);
        if(iRet > 0 && FD_ISSET(0, &fdsetr))
        {
            iRead = read(0, sApacheLog, iLogBufLen);
            if(iRead < 0 && errno != EAGAIN)
            {
                MtReport_Log_Error("read log failed, msg:%s", strerror(errno));
                break;
            }
            else  if(iRead > 0 && sApacheLog[0] != ' ')
            {
                sApacheLog[iRead] = '\0';
                MtReport_Log_Debug("read log bytes:%lu, log:%s", iRead, sApacheLog);
                ParseApacheLog(sApacheLog);
                MtReport_Attr_Add(XRK_ACC_COUNT, 1);
                stConfig.iReqCount++;
            }
        }

        if(stConfig.dwStartStaticTime+60 <= tmCur)
        {
            // 一分钟上报一次关键数据
            if(stConfig.iReqCount > 0) {
                MtReport_Attr_Add(XRK_SITE_ACC_TOTAL, stConfig.iReqCount);
                int iMax = (int)ceil((float)stConfig.iMaxResponseTime/1000);
                int iAvg = (int)ceil((float)stConfig.uiTotalResponseTime/stConfig.iReqCount/1000);
                MtReport_Attr_Set(XRK_ACC_MAX_RESP_TIME, iMax);
                MtReport_Attr_Set(XRK_ACC_AVG_RESP_TIME, iAvg);

                // 每分钟请求的概要信息：
                // 请求量 | 最大响应时间 | 平均响应时间 | 最大响应时的请求ip | 最大响应时的请求url
                MtReport_Log_Info("request info | req:%lu | max:%d | avg:%d | ip:%s | url:%s",
                    stConfig.iReqCount, iMax, iAvg, stConfig.strMaxReqIp.c_str(), 
                    stConfig.strMaxReqInfo.c_str());

                stConfig.uiTotalResponseTime = 0;
                stConfig.iMaxResponseTime = 0;
                stConfig.iReqCount = 0;

                // 访客地域分布
                const char *pReqInfo = GetRequestIpStr().c_str();
                MtReport_Log_Info("request ip info | %s", pReqInfo);
                stConfig.strMaxReqInfo.clear();

                // 请求来源上报
                std::map<std::string, int>::iterator it_map_si = stConfig.mapReqRefers.begin();
                for(; it_map_si != stConfig.mapReqRefers.end(); it_map_si++) 
                    MtReport_Str_Attr_Add(XRK_ACC_LINK_FROM, it_map_si->first.c_str(), it_map_si->second);
                
                // 资源访问量分布排行
                it_map_si = stConfig.mapReqRes.begin();
                for(; it_map_si != stConfig.mapReqRes.end(); it_map_si++) 
                    MtReport_Str_Attr_Add(XRK_RESOURCE_ACC_DIS, it_map_si->first.c_str(), it_map_si->second);

                ReportRespTimeInfo();
                stConfig.mapReqIps.clear();
                stConfig.mapReqRefers.clear();
                stConfig.mapReqRes.clear();
            }
            stConfig.dwStartStaticTime = tmCur;
        }
    }
    return 0;
}

